#In this project, we build a image classifier to classify phenotypes corresponding to MSL2 ‘territories’,‘speckles’ and ‘intermediates’ across 12 different genotypes.

Read in and visualize subset of images for each of the three classes

Test image classifier

  1. List images

Visualize a subset of nuclei from each class using EBI package. Default readTIFF option returns scaled 0-1 grayscale values, as.is=T returns raw values - raw values appear to be 16 bit (max value is 65280 .ie. 2^16 -256)

Note: Verify which channel signal will be used for classification. For this set, GFP (channel 2) is the appropriate channel

## Extract categories - 
Train_categories <- unique(gsub("_.*","",gsub("Images_Train/","",list_files)))

## Display 6 nuclei from each cateogory

image_raw <- lapply(list_files, function(i) readTIFF(i,as.is = T)[,,2])

i <- 1
j <- 1

set.seed(12554)

par(mfrow=c(2,3)) ## adjust margins based on number of images
for(i in 1:length(Train_categories)){
  
  my_category_images <- list_files[grep(Train_categories[i],list_files)]
  my_category_images_sub <- my_category_images[sample.int(length(my_category_images),6)]
  
  for (j in 1:6){
   image(readTIFF(my_category_images_sub[j],as.is=T)[,,2],col=gray.colors(256), main=paste(Train_categories[i],"_",j),asp=1)
   axis(1,0:1,0:1) 
  }
  
  
}

Extract GLCM

Now extract Gray-Level Co-occurrence Matrix (GLCM) features for whole image. Also segment and calculate object level features. Representative image, histogram of signal distribution and segmented image for each of Intermediate, Speckle and Territory classes are displayed.

res_method <- as.data.frame(matrix(nrow=length(list_files),ncol=46))

#Test case images

i <- 3 #Intermediate
i <- 167 #speckles
i <- 374 #territory


for (i in 1:length(list_files)){
  
  y <- readImage(list_files[i])[,,2]
  imagename <- gsub("_.*","",gsub(".*/","",list_files[i]))
  
  #### for visualization purposes in test cases -- Histogram lets you define background intensity for object detection 
  if (i %in% c(3,167,374)){
    EBImage::display(y)
    hist(as.vector(imageData(y)),main=imagename) 
  }
    
  
  #### Compute Haralick features for entire image
  x <- Image(1,dim=dim(y))   
  res_method[i,1:26] <- colMeans(computeFeatures.haralick(x, y))

  ### Add mean intensity of image
  res_method[i,27] <- mean(imageData(y))
  
  
  ### Detect objects by thresholding on gaussian blurred images and compute additional object specific features
  y_2 <- gblur(y,sigma=0.5)  ### mild blurring - sigma must be selected appropriately !
  
  #EBImage::display(y_2,all=T)
  cutoff <- 0.3 ## background cutoff selected based on histogram      
  x <- y_2 > cutoff 
  
  if (i %in% c(3,167,374)){
    EBImage::display(x,all=T)
  }
  
  #if no objects detected, then object feature is 0
  if(sum(imageData(x))==0){
    res_method[i,28:46] <- 0
  } else {
    res_method[i,28:46] <- c(computeFeatures.moment(x,y), computeFeatures.shape(x), computeFeatures.basic(x,y))

  }

  rownames(res_method)[i] <- gsub("_RGB.tif","",gsub("Images_Train/","",list_files[i]))
}

Each row of output dataframe represents an image, each column is the image-averaged extracted feature. Outcome variable is the class label

head(res_method)
##                                                           V1       V2        V3
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.08855262 1.777328 0.7577260
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.05843461 2.642324 0.7717139
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.14894853 2.173823 0.8104294
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.14921056 1.867110 0.7574788
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.07663609 1.687390 0.7737413
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.06976166 2.359195 0.8030911
##                                                         V4        V5       V6
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 4.668012 0.6750258 5.387328
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    6.787309 0.5831728 8.102039
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   6.733544 0.7175006 5.283571
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      4.849375 0.7164898 4.487800
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 4.728897 0.6424870 6.082627
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    6.990576 0.6329554 6.168019
##                                                         V7        V8       V9
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 32.33825 0.9778461 1.278933
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    69.53804 1.0998163 1.517621
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   39.28764 0.9792376 1.246613
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      26.47547 0.8898718 1.147063
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 38.83597 1.0222149 1.359136
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    47.53571 1.0756128 1.424631
##                                                        V10       V11       V12
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 1.777328 0.5092955 0.2278099
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    2.642324 0.5878146 0.1938215
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   2.173823 0.5073276 0.3366114
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      1.867110 0.4909483 0.2436463
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 1.687390 0.5180242 0.2185738
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    2.359195 0.5573914 0.2567113
##                                                         V13        V14      V15
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.5293558 0.07772315 2.879286
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.5272450 0.06060207 3.999657
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.6294582 0.12817032 3.731799
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.5221130 0.13038258 2.475062
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.5325514 0.06678650 2.321121
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.5853827 0.06199452 4.193571
##                                                         V16      V17       V18
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.6243313 4.832214 0.6225742
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.6542558 6.784126 0.5557300
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.6896816 7.012856 0.6732657
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.6500609 4.536417 0.6770442
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.6974503 4.835933 0.5979742
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.6944256 7.861785 0.5935076
##                                                        V19      V20       V21
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 5.520714 33.02087 0.9851549
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    8.178228 69.46976 1.0836587
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   5.375343 39.51140 0.9945011
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      4.555111 24.98735 0.9059002
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 6.254002 40.39662 1.0219847
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    6.358571 51.05604 1.0857665
##                                                        V22      V23       V24
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 1.329097 2.879286 0.5779118
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    1.522363 3.999657 0.6329055
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   1.299336 3.731799 0.5758876
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      1.197645 2.475062 0.5448614
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 1.410643 2.321121 0.5676343
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    1.477456 4.193571 0.6304734
##                                                         V25       V26
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.1766458 0.4764845
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.1658622 0.4905949
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.2755093 0.5829002
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.1861049 0.4667834
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.1698007 0.4798931
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.2063121 0.5367891
##                                                          V27      V28      V29
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.07502884 34.77017 34.75766
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.11667160 45.90038 21.23337
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.07237144 31.37827 16.20754
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.06055882 26.75443 37.59454
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.08585621 26.39859 35.02931
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.08656737 23.10168 15.28314
##                                                        V30       V31
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 17.04385 0.8851290
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    26.12551 0.8855931
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   17.64074 0.5695780
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      25.04194 0.9088548
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 30.95548 0.8838904
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    20.75218 0.7579586
##                                                           V32 V33 V34      V35
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 -0.23467400  41   7 1.009265
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    -0.06850603 107  43 4.220496
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   -0.33112465  62  32 3.765024
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      -0.36565479  36   2 0.500000
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 -0.59981352  37   1 0.000000
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    -0.15170294  93  49 4.673231
##                                                         V36       V37      V38
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.5361964 0.5150788 1.921946
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    1.6349564 0.5929092 7.063032
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   1.5638702 0.2576941 6.200113
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.0000000 0.5000000 0.500000
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.0000000 0.0000000 0.000000
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    1.6108684 0.3421032 7.224865
##                                                         V39        V40
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.3899570 0.08509916
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.3995235 0.08038216
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.4208729 0.09880836
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.3992375 0.09724866
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.3616322 0.05569866
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.3889943 0.08320161
##                                                          V41       V42
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.06976941 0.2792157
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.09302588 0.2823529
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.09302588 0.2823529
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.05814118 0.2509804
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.04651294 0.2823529
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.06976941 0.2980392
##                                                         V43       V44       V45
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.2980392 0.3607843 0.5843137
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.2980392 0.3921569 0.5333333
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.3137255 0.4078431 0.5990196
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.2980392 0.3764706 0.5490196
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.3074510 0.3450980 0.4705882
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.2980392 0.3764706 0.5647059
##                                                         V46
## Intermediate_0001-0021_rox2ko112_gfp_rox1d3f_1_20 0.6149020
## Intermediate_0001-0023_rox2ko112_gfp_A3B0_2_19    0.5951373
## Intermediate_0001-0023_rox2ko17_gfp_rox1d3f_1_8   0.7188627
## Intermediate_0001-0041_rox2ko17_gfp_A0B0_2_5      0.7209804
## Intermediate_0001-0064_rox2ko112_gfp_rox1d3c_1_22 0.5433725
## Intermediate_0002-0022_rox2ko112_gfp_A3B0_1_18    0.6654118

Preliminary DataViz

Test visualization the features for individual classes

## Heatmap

hm <- ComplexHeatmap::Heatmap((scale(res_method)),show_row_names = F, width=10)

labels <- as.data.frame(res_method) %>% rownames_to_column("Phenotype") %>% mutate(Phenotype=case_when(grepl("Territory",Phenotype)~"Territory",
                                                                                                       grepl("Speckles",Phenotype)~"Speckles",
                                                                                                       grepl("Intermediate",Phenotype)~"Intermediate")) %>%dplyr::select(Phenotype)
                                                                                      
lab_hm <- Heatmap(labels,row_order = row_order(hm), width=1,col = c("Grey","Red","Black"))
## Warning: The input is a data frame, convert it to a matrix.
## Warning: Note: not all columns in the data frame are numeric. The data frame
## will be converted into a character matrix.
## Warning: The heatmap has not been initialized. You might have different results
## if you repeatedly execute this function, e.g. when row_km/column_km was
## set. It is more suggested to do as `ht = draw(ht); row_order(ht)`.
hm_full <- hm + lab_hm
draw(hm_full)

## PCA 
pca_res <- prcomp(res_method,scale. = T)

ggplot2::autoplot(pca_res,data=cbind(res_method,labels),colour="Phenotype")

## Umap

umap_res <- umap(res_method)

umap_df <- cbind(as.data.frame(umap_res$layout),labels)

umap_df %>% ggplot(aes(x=V1,y=V2,col=Phenotype)) + geom_point()

  1. A clustered heatmap : Shows that speckles and territories separate quite well but ‘Intermediates’ are distributed everywhere -> Linear separation methods wont that well

  2. PCA : Shows that intermediate points are scattered in between well-separated speckles and territories

  3. UMAP : Looks funky, need to verify if highly correlated variables can cause issues with UMAP

Build ML classifier

Now test different ML approaches for classification. We will compare Logistic Regression with Elastic Nets, SVM and Boosted Decision Trees.

First, we define our train-test splits (80-20), cross validation scheme (10-fold CV) and get a sense of how correlated the predictors are so we can decide on appropriate Feature selection approaches.

set.seed(125)

res_fin <- cbind(res_method,Phenotype=as.factor(labels[,"Phenotype"]))

saveRDS(res_fin,file="241009_GLCMmetrics.rds")

#### Define train and test data splits - Use 80-20 split. 

res_split <- initial_split(res_fin,prop=0.8,strata=Phenotype)

res_training <- res_split %>% training()
res_testing <- res_split %>% testing()

res_folds <- vfold_cv(res_training,v=10,strata=Phenotype)


#### Are there any highly correlated variables ? 
res_fin %>% select_if(is.numeric) %>% corrr::correlate() %>% shave() %>% rplot(print_cor = T) + theme(axis.text.x = element_text(angle=90,hjust = 1))
## Correlation computed with
## • Method: 'pearson'
## • Missing treated using: 'pairwise.complete.obs'

Confirming what we previously saw in the heatmap, many Haralick texture features are highly correlated. We can use appropriate feature selection method depending on ML algorithm.

Now we remove variables highly correlated to other predictors (threshold of r=0.9) as well as remove Near-zero variance predictors. We don’t have to be very stringent as Regularization will shrink/minimize/eliminate the influence of correlated variables.

Note 1 : scaling/normalization is not important for simple regression algorithms. However, it is more important for distance based algorithms (SVM etc) or magnitude dependent penalty algorithms (Lasso for instance)

Note 2: Scale sets standard dev to 1, normalize sets stdev to 1 and mean to 0 -> Pick depending on application ! Non monotonic transformations like normalization can affect outcomes !

Elastic Net

## Recipe for preprocess

filt_recipe_glm <- recipe(Phenotype~.,data=res_fin) %>%
  step_normalize(all_numeric_predictors()) %>%
  step_nzv(all_predictors()) %>%
  step_corr(all_numeric(),threshold=0.9)
  
## train recipe
filt_prep_glm <- filt_recipe_glm %>% prep(training=res_training)

## removes 29 highly correlated variables ! (note that if two vars are correlated to each other, only one is removed)
tidy(filt_prep_glm,number=3)
## # A tibble: 29 × 2
##    terms id        
##    <chr> <chr>     
##  1 V7    corr_gPMNQ
##  2 V9    corr_gPMNQ
##  3 V11   corr_gPMNQ
##  4 V13   corr_gPMNQ
##  5 V19   corr_gPMNQ
##  6 V20   corr_gPMNQ
##  7 V21   corr_gPMNQ
##  8 V22   corr_gPMNQ
##  9 V24   corr_gPMNQ
## 10 V26   corr_gPMNQ
## # ℹ 19 more rows
### Bake recipe to obtain clean training data - Optional to visualize transformed training data 
filt_bake_glm <- filt_prep_glm %>% bake(new_data=NULL)

Our preprocessing removes 29 variables !

Now we build the classifier. Optimal parameters for penalty and mixture(between lasso and ridge) are found through grid search

#### Define model 

res_model_glmnet <- multinom_reg(penalty = tune(),mixture=tune()) %>% set_mode("classification") %>% set_engine("glmnet")

res_model_glmnet_workflow <- workflow() %>% add_recipe(filt_recipe_glm) %>% add_model(res_model_glmnet) 

penalty_grid <- grid_regular(penalty(range=c(-3,-1)),mixture(range=c(0,1)),levels=c(mixture=5,penalty=20))   ## 100 models !

glmnet_grid <- tune_grid(res_model_glmnet_workflow,resamples = res_folds,grid=penalty_grid)


autoplot(glmnet_grid,metric="roc_auc")

Looks like regularization penalty close to 0 works best, while mixture (Prop of Lasso Penalty) is optimal at 1 .ie. mixture=1 meaning ridge regression.

Now we select best model based on ROC_AUC and collect metrics

best_vals <- glmnet_grid %>% select_best(metric="accuracy") ## select best based on roc_auc or accuracy - result is similar ! 

### finalize model 
res_model_metrics <- finalize_workflow(res_model_glmnet_workflow,best_vals) %>% fit_resamples(resamples=res_folds) %>% collect_metrics()

print(res_model_metrics)
## # A tibble: 3 × 6
##   .metric     .estimator  mean     n std_err .config             
##   <chr>       <chr>      <dbl> <int>   <dbl> <chr>               
## 1 accuracy    multiclass 0.777    10 0.0204  Preprocessor1_Model1
## 2 brier_class multiclass 0.168    10 0.00843 Preprocessor1_Model1
## 3 roc_auc     hand_till  0.908    10 0.0110  Preprocessor1_Model1

Resampled roc_auc is ~91% (ratio of true positives to false positives), accuracy is fairly low with 78%. Note that in previous tests we had achieved higher accuracies with better quality (.ie. higher resolution) images - Also verify training labels again !

Apply model on whole dataset to get final metrics. Output df_res object contains original class labels, predicted class labels as well as class specific probabilities. (Note that row order corresponds to same rows defined in the res_fin object containing file names)

res_model_glmnet_final <- finalize_workflow(res_model_glmnet_workflow,best_vals) %>% last_fit(res_split)   ### final fit to train-test split

res_model_glmnet_final %>% collect_metrics() 
## # A tibble: 3 × 4
##   .metric     .estimator .estimate .config             
##   <chr>       <chr>          <dbl> <chr>               
## 1 accuracy    multiclass     0.815 Preprocessor1_Model1
## 2 roc_auc     hand_till      0.915 Preprocessor1_Model1
## 3 brier_class multiclass     0.164 Preprocessor1_Model1
## res_model_final %>% extract_workflow()   ### This retrieves the model object for future use !

df_res <-data.frame(res_model_glmnet_final %>% extract_workflow() %>% predict(res_fin),res_model_glmnet_final %>% extract_workflow() %>% predict(res_fin,type="prob"),res_fin$Phenotype)  

print(df_res)
##      .pred_class .pred_Intermediate .pred_Speckles .pred_Territory
## 1   Intermediate       6.256659e-01   2.975573e-01    7.677671e-02
## 2   Intermediate       7.590915e-01   9.865113e-02    1.422574e-01
## 3   Intermediate       6.511714e-01   3.034147e-01    4.541389e-02
## 4   Intermediate       6.356624e-01   1.165913e-01    2.477463e-01
## 5   Intermediate       5.355880e-01   5.529285e-02    4.091191e-01
## 6   Intermediate       6.696319e-01   1.022333e-01    2.281348e-01
## 7   Intermediate       5.777025e-01   1.315914e-01    2.907061e-01
## 8   Intermediate       7.749291e-01   1.180196e-01    1.070513e-01
## 9   Intermediate       5.180655e-01   3.596811e-01    1.222534e-01
## 10  Intermediate       5.317792e-01   3.979316e-01    7.028914e-02
## 11  Intermediate       6.813782e-01   1.957112e-01    1.229106e-01
## 12  Intermediate       4.995264e-01   4.537030e-01    4.677061e-02
## 13  Intermediate       5.570694e-01   1.281301e-01    3.148005e-01
## 14  Intermediate       7.023743e-01   1.416337e-01    1.559921e-01
## 15     Territory       1.998480e-01   3.920204e-03    7.962318e-01
## 16  Intermediate       6.550931e-01   6.168515e-02    2.832218e-01
## 17  Intermediate       6.350683e-01   2.461447e-01    1.187871e-01
## 18      Speckles       1.411484e-01   8.573432e-01    1.508417e-03
## 19      Speckles       1.070218e-01   8.924559e-01    5.222768e-04
## 20     Territory       1.827601e-01   1.133181e-03    8.161067e-01
## 21     Territory       4.481823e-01   1.941812e-02    5.323995e-01
## 22  Intermediate       7.218091e-01   7.276706e-02    2.054238e-01
## 23  Intermediate       7.385910e-01   7.677511e-03    2.537315e-01
## 24      Speckles       4.529223e-01   5.256692e-01    2.140854e-02
## 25  Intermediate       5.497300e-01   4.209291e-01    2.934098e-02
## 26      Speckles       3.875296e-01   4.528388e-01    1.596316e-01
## 27  Intermediate       5.710534e-01   3.175089e-01    1.114377e-01
## 28  Intermediate       5.328298e-01   9.322264e-02    3.739476e-01
## 29  Intermediate       5.873464e-01   3.819178e-01    3.073582e-02
## 30  Intermediate       5.193787e-01   4.502562e-01    3.036507e-02
## 31  Intermediate       6.771456e-01   4.519962e-02    2.776548e-01
## 32  Intermediate       6.924128e-01   1.247991e-01    1.827882e-01
## 33  Intermediate       6.702736e-01   3.078211e-01    2.190526e-02
## 34  Intermediate       6.341335e-01   2.416717e-02    3.416994e-01
## 35  Intermediate       5.512392e-01   2.804028e-03    4.459568e-01
## 36      Speckles       3.551550e-02   9.644157e-01    6.880387e-05
## 37     Territory       3.695187e-01   1.308774e-02    6.173936e-01
## 38  Intermediate       7.705033e-01   1.137950e-01    1.157017e-01
## 39  Intermediate       8.575302e-01   7.641109e-02    6.605872e-02
## 40      Speckles       4.321159e-01   5.530248e-01    1.485931e-02
## 41  Intermediate       5.418591e-01   5.538020e-02    4.027607e-01
## 42     Territory       4.732694e-01   4.467940e-02    4.820512e-01
## 43      Speckles       2.917847e-01   6.742492e-01    3.396616e-02
## 44  Intermediate       5.274035e-01   4.366180e-01    3.597851e-02
## 45  Intermediate       5.754522e-01   3.740979e-01    5.044990e-02
## 46  Intermediate       6.173031e-01   1.390299e-01    2.436670e-01
## 47  Intermediate       4.972821e-01   5.536371e-02    4.473542e-01
## 48  Intermediate       6.883942e-01   2.387093e-01    7.289658e-02
## 49  Intermediate       5.263523e-01   3.812101e-01    9.243761e-02
## 50  Intermediate       5.405966e-01   3.736254e-01    8.577805e-02
## 51  Intermediate       6.911764e-01   1.845428e-01    1.242808e-01
## 52  Intermediate       6.155366e-01   7.149645e-02    3.129669e-01
## 53  Intermediate       7.124565e-01   8.327972e-03    2.792155e-01
## 54  Intermediate       6.211602e-01   3.296203e-01    4.921958e-02
## 55  Intermediate       6.343192e-01   2.655479e-01    1.001329e-01
## 56     Territory       4.532037e-01   5.447584e-02    4.923205e-01
## 57     Territory       3.237415e-01   2.566082e-02    6.505976e-01
## 58  Intermediate       6.819858e-01   9.708059e-02    2.209336e-01
## 59  Intermediate       7.605391e-01   1.506759e-02    2.243933e-01
## 60  Intermediate       4.868609e-01   2.715210e-01    2.416181e-01
## 61      Speckles       3.637066e-01   6.154911e-01    2.080230e-02
## 62  Intermediate       7.421402e-01   7.420915e-02    1.836507e-01
## 63  Intermediate       6.723360e-01   2.426605e-01    8.500355e-02
## 64  Intermediate       6.453945e-01   4.621845e-02    3.083871e-01
## 65  Intermediate       7.395603e-01   5.901486e-02    2.014248e-01
## 66  Intermediate       7.972195e-01   7.291827e-02    1.298622e-01
## 67  Intermediate       7.864926e-01   1.600472e-01    5.346017e-02
## 68  Intermediate       5.434465e-01   4.315161e-01    2.503738e-02
## 69  Intermediate       7.176793e-01   1.047724e-01    1.775483e-01
## 70  Intermediate       5.224806e-01   4.468186e-01    3.070074e-02
## 71     Territory       4.218110e-01   2.564620e-02    5.525428e-01
## 72  Intermediate       6.003483e-01   2.970959e-01    1.025557e-01
## 73     Territory       6.818259e-02   1.123977e-03    9.306934e-01
## 74  Intermediate       6.545468e-01   3.102911e-01    3.516211e-02
## 75  Intermediate       5.742594e-01   3.767826e-01    4.895805e-02
## 76     Territory       3.699739e-01   1.015013e-02    6.198760e-01
## 77  Intermediate       6.772891e-01   1.130829e-01    2.096280e-01
## 78  Intermediate       5.826202e-01   3.584594e-01    5.892036e-02
## 79  Intermediate       6.578401e-01   2.105794e-02    3.211020e-01
## 80     Territory       4.265757e-01   4.056551e-02    5.328587e-01
## 81  Intermediate       6.943420e-01   5.969921e-02    2.459588e-01
## 82  Intermediate       7.276358e-01   1.030036e-01    1.693605e-01
## 83  Intermediate       7.168753e-01   2.081137e-01    7.501098e-02
## 84  Intermediate       6.198025e-01   2.762601e-02    3.525715e-01
## 85     Territory       3.156881e-01   1.095983e-02    6.733521e-01
## 86  Intermediate       5.482599e-01   3.786004e-03    4.479541e-01
## 87     Territory       1.766833e-01   4.413278e-03    8.189035e-01
## 88  Intermediate       6.713640e-01   2.196814e-01    1.089546e-01
## 89  Intermediate       6.298294e-01   3.431362e-01    2.703433e-02
## 90  Intermediate       5.587627e-01   3.982602e-01    4.297708e-02
## 91  Intermediate       6.117728e-01   3.040680e-01    8.415918e-02
## 92  Intermediate       5.499502e-01   4.229064e-01    2.714333e-02
## 93      Speckles       5.656171e-02   9.430619e-01    3.763890e-04
## 94  Intermediate       7.818066e-01   6.658385e-02    1.516095e-01
## 95  Intermediate       6.424960e-01   3.068771e-01    5.062685e-02
## 96      Speckles       3.344485e-01   6.591334e-01    6.418124e-03
## 97      Speckles       1.208186e-01   8.783069e-01    8.745414e-04
## 98  Intermediate       4.970360e-01   4.612656e-01    4.169842e-02
## 99  Intermediate       7.251684e-01   1.000598e-01    1.747718e-01
## 100 Intermediate       6.467405e-01   2.963889e-01    5.687058e-02
## 101 Intermediate       6.789210e-01   5.288529e-02    2.681937e-01
## 102 Intermediate       6.091792e-01   2.007885e-01    1.900323e-01
## 103 Intermediate       6.664968e-01   1.300542e-01    2.034490e-01
## 104 Intermediate       6.842653e-01   1.352953e-01    1.804394e-01
## 105 Intermediate       5.685806e-01   2.515834e-01    1.798360e-01
## 106 Intermediate       6.391027e-01   2.829634e-01    7.793397e-02
## 107     Speckles       3.579578e-01   6.345706e-01    7.471637e-03
## 108 Intermediate       6.031296e-01   2.841393e-01    1.127311e-01
## 109 Intermediate       6.069430e-01   2.591161e-01    1.339409e-01
## 110 Intermediate       6.447673e-01   3.140782e-01    4.115450e-02
## 111     Speckles       1.755315e-01   8.184620e-01    6.006512e-03
## 112 Intermediate       5.728918e-01   1.937297e-01    2.333785e-01
## 113 Intermediate       6.726895e-01   2.352585e-01    9.205201e-02
## 114 Intermediate       5.001732e-01   4.398125e-01    6.001429e-02
## 115 Intermediate       5.113200e-01   5.519639e-02    4.334836e-01
## 116     Speckles       4.194293e-01   5.716640e-01    8.906698e-03
## 117 Intermediate       5.916851e-01   1.859596e-02    3.897189e-01
## 118     Speckles       4.368797e-01   5.122741e-01    5.084625e-02
## 119 Intermediate       5.894448e-01   2.218266e-01    1.887287e-01
## 120    Territory       1.969399e-01   6.180842e-03    7.968792e-01
## 121 Intermediate       6.909431e-01   1.504258e-01    1.586311e-01
## 122     Speckles       4.201716e-01   5.680827e-01    1.174566e-02
## 123 Intermediate       5.084905e-01   1.113565e-02    4.803739e-01
## 124     Speckles       4.098068e-01   5.868241e-01    3.369116e-03
## 125     Speckles       2.859636e-01   6.763578e-01    3.767864e-02
## 126 Intermediate       7.161693e-01   2.101324e-01    7.369830e-02
## 127     Speckles       3.361727e-01   6.522572e-01    1.157012e-02
## 128 Intermediate       6.713185e-01   2.562570e-01    7.242456e-02
## 129 Intermediate       6.941632e-01   2.347279e-01    7.110887e-02
## 130 Intermediate       6.039668e-01   2.944807e-01    1.015525e-01
## 131 Intermediate       6.532936e-01   2.209789e-02    3.246085e-01
## 132 Intermediate       5.478175e-01   6.003623e-02    3.921462e-01
## 133 Intermediate       5.353589e-01   3.961639e-01    6.847712e-02
## 134 Intermediate       6.479606e-01   3.127039e-01    3.933546e-02
## 135     Speckles       1.234695e-01   8.752859e-01    1.244633e-03
## 136     Speckles       3.367825e-03   9.966217e-01    1.045719e-05
## 137 Intermediate       4.921447e-01   4.869942e-01    2.086111e-02
## 138     Speckles       3.035066e-02   9.695363e-01    1.130823e-04
## 139     Speckles       3.344330e-01   6.573623e-01    8.204678e-03
## 140     Speckles       4.308172e-01   4.784987e-01    9.068417e-02
## 141     Speckles       4.202226e-01   4.810503e-01    9.872710e-02
## 142     Speckles       1.837860e-01   7.909559e-01    2.525810e-02
## 143     Speckles       8.712075e-02   9.120692e-01    8.100512e-04
## 144     Speckles       1.458092e-01   8.492559e-01    4.934954e-03
## 145     Speckles       2.229897e-03   9.977681e-01    2.036905e-06
## 146     Speckles       1.830110e-01   8.154462e-01    1.542848e-03
## 147     Speckles       1.223805e-01   8.764076e-01    1.211882e-03
## 148     Speckles       4.105297e-03   9.958914e-01    3.323450e-06
## 149     Speckles       8.191035e-04   9.991806e-01    2.479646e-07
## 150     Speckles       8.271193e-04   9.991725e-01    3.749410e-07
## 151     Speckles       1.301868e-03   9.986967e-01    1.453438e-06
## 152     Speckles       2.207470e-02   9.779105e-01    1.478758e-05
## 153     Speckles       3.791907e-01   6.128513e-01    7.958085e-03
## 154 Intermediate       6.182298e-01   3.555585e-01    2.621168e-02
## 155     Speckles       1.582306e-03   9.984062e-01    1.151258e-05
## 156     Speckles       1.462885e-01   8.526695e-01    1.041988e-03
## 157     Speckles       3.543527e-02   9.643999e-01    1.648567e-04
## 158     Speckles       7.237089e-02   9.226109e-01    5.018251e-03
## 159     Speckles       2.560881e-01   7.344623e-01    9.449580e-03
## 160 Intermediate       4.998961e-01   4.743073e-01    2.579657e-02
## 161     Speckles       1.036325e-02   9.894490e-01    1.877321e-04
## 162     Speckles       4.698160e-01   5.186834e-01    1.150066e-02
## 163     Speckles       3.358819e-02   9.655096e-01    9.022399e-04
## 164 Intermediate       5.911156e-01   3.654192e-01    4.346514e-02
## 165 Intermediate       7.395561e-01   1.781658e-01    8.227807e-02
## 166     Speckles       2.870576e-01   6.883752e-01    2.456719e-02
## 167     Speckles       4.464013e-02   9.552624e-01    9.742733e-05
## 168     Speckles       4.273092e-01   4.953364e-01    7.735436e-02
## 169     Speckles       2.884566e-01   6.714593e-01    4.008411e-02
## 170     Speckles       2.766211e-01   7.180332e-01    5.345713e-03
## 171     Speckles       3.582496e-02   9.639566e-01    2.184100e-04
## 172 Intermediate       5.504647e-01   2.926605e-01    1.568748e-01
## 173     Speckles       3.258318e-01   6.506697e-01    2.349850e-02
## 174     Speckles       1.157486e-01   8.805367e-01    3.714772e-03
## 175     Speckles       6.559558e-02   9.337178e-01    6.866571e-04
## 176     Speckles       7.001061e-02   9.293980e-01    5.913612e-04
## 177     Speckles       3.131387e-01   6.643021e-01    2.255919e-02
## 178     Speckles       9.883036e-02   9.005497e-01    6.199042e-04
## 179     Speckles       2.517304e-02   9.746898e-01    1.371250e-04
## 180 Intermediate       5.346863e-01   4.420694e-01    2.324429e-02
## 181    Territory       8.912594e-02   9.797052e-04    9.098944e-01
## 182     Speckles       2.013921e-03   9.979843e-01    1.786845e-06
## 183 Intermediate       5.369526e-01   3.987907e-01    6.425671e-02
## 184     Speckles       3.169487e-03   9.968300e-01    5.626013e-07
## 185     Speckles       5.119664e-02   9.484908e-01    3.125301e-04
## 186     Speckles       1.564393e-01   8.410045e-01    2.556233e-03
## 187     Speckles       3.828014e-01   6.104237e-01    6.774965e-03
## 188     Speckles       1.055445e-01   8.933488e-01    1.106714e-03
## 189 Intermediate       5.288059e-01   4.031657e-01    6.802836e-02
## 190 Intermediate       5.288059e-01   4.031657e-01    6.802836e-02
## 191    Territory       3.390955e-01   3.241300e-02    6.284915e-01
## 192     Speckles       9.697582e-02   9.026780e-01    3.462144e-04
## 193     Speckles       2.989867e-01   6.949248e-01    6.088547e-03
## 194     Speckles       1.299098e-01   8.661599e-01    3.930282e-03
## 195     Speckles       1.309196e-01   8.683564e-01    7.240282e-04
## 196     Speckles       3.032248e-02   9.692477e-01    4.298336e-04
## 197     Speckles       5.586467e-02   9.439248e-01    2.104821e-04
## 198     Speckles       2.534110e-01   7.456360e-01    9.529672e-04
## 199     Speckles       1.401415e-01   8.590129e-01    8.456267e-04
## 200 Intermediate       5.184270e-01   3.687320e-01    1.128411e-01
## 201     Speckles       1.285570e-01   8.690806e-01    2.362372e-03
## 202     Speckles       8.527764e-02   9.144738e-01    2.486033e-04
## 203 Intermediate       6.097949e-01   3.582371e-01    3.196800e-02
## 204 Intermediate       5.413062e-01   4.432842e-01    1.540965e-02
## 205     Speckles       2.949064e-01   7.015406e-01    3.552988e-03
## 206 Intermediate       6.258915e-01   1.860147e-01    1.880938e-01
## 207     Speckles       3.551111e-01   6.398983e-01    4.990534e-03
## 208     Speckles       1.753861e-01   8.231163e-01    1.497551e-03
## 209     Speckles       2.753381e-01   7.217321e-01    2.929832e-03
## 210     Speckles       2.822490e-01   7.121233e-01    5.627694e-03
## 211     Speckles       1.709056e-01   8.246265e-01    4.467837e-03
## 212     Speckles       2.504502e-01   7.441132e-01    5.436593e-03
## 213     Speckles       3.851946e-02   9.607451e-01    7.354196e-04
## 214     Speckles       1.103546e-01   8.878312e-01    1.814113e-03
## 215    Territory       1.538098e-01   2.305552e-03    8.438847e-01
## 216     Speckles       1.881062e-02   9.811731e-01    1.631209e-05
## 217     Speckles       4.362702e-01   5.111574e-01    5.257242e-02
## 218     Speckles       3.956759e-02   9.581775e-01    2.254946e-03
## 219     Speckles       2.160279e-01   7.759509e-01    8.021188e-03
## 220     Speckles       4.358824e-01   5.122267e-01    5.189093e-02
## 221     Speckles       9.152495e-02   9.082953e-01    1.797209e-04
## 222 Intermediate       5.440199e-01   4.438644e-01    1.211570e-02
## 223     Speckles       1.456237e-01   8.525751e-01    1.801224e-03
## 224     Speckles       9.447344e-02   9.050735e-01    4.530125e-04
## 225     Speckles       1.154205e-01   8.840947e-01    4.848621e-04
## 226     Speckles       4.048941e-01   5.773620e-01    1.774384e-02
## 227     Speckles       1.757120e-01   8.231793e-01    1.108611e-03
## 228     Speckles       2.858429e-01   7.117909e-01    2.366220e-03
## 229     Speckles       2.990758e-01   6.939430e-01    6.981184e-03
## 230     Speckles       3.102953e-01   6.741639e-01    1.554079e-02
## 231 Intermediate       5.875605e-01   3.662715e-01    4.616798e-02
## 232     Speckles       3.056549e-01   6.902111e-01    4.133926e-03
## 233     Speckles       3.921911e-03   9.960737e-01    4.426321e-06
## 234     Speckles       4.847154e-01   4.907257e-01    2.455897e-02
## 235     Speckles       4.813827e-01   4.984196e-01    2.019770e-02
## 236     Speckles       2.309706e-02   9.767628e-01    1.401787e-04
## 237     Speckles       4.589277e-01   5.114724e-01    2.959992e-02
## 238     Speckles       9.732837e-02   9.013359e-01    1.335748e-03
## 239     Speckles       9.899831e-02   8.982084e-01    2.793267e-03
## 240     Speckles       3.420258e-01   6.521932e-01    5.780961e-03
## 241     Speckles       1.391045e-03   9.986089e-01    8.682728e-08
## 242     Speckles       2.239920e-02   9.774641e-01    1.366696e-04
## 243 Intermediate       6.010021e-01   3.482611e-01    5.073677e-02
## 244     Speckles       2.482515e-03   9.975168e-01    7.123662e-07
## 245     Speckles       2.482968e-01   7.487955e-01    2.907647e-03
## 246     Speckles       3.083318e-01   6.733232e-01    1.834506e-02
## 247     Speckles       5.540015e-02   9.443630e-01    2.368845e-04
## 248     Speckles       3.904247e-04   9.996093e-01    2.806351e-07
## 249     Speckles       1.897711e-01   8.000899e-01    1.013897e-02
## 250     Speckles       1.323653e-01   8.628495e-01    4.785171e-03
## 251     Speckles       3.921769e-02   9.607370e-01    4.527356e-05
## 252     Speckles       2.625773e-01   7.337872e-01    3.635555e-03
## 253 Intermediate       5.851956e-01   3.853273e-01    2.947709e-02
## 254     Speckles       6.249596e-02   9.369970e-01    5.070391e-04
## 255     Speckles       1.469244e-01   8.524502e-01    6.253848e-04
## 256     Speckles       1.049407e-01   8.942172e-01    8.420096e-04
## 257     Speckles       3.849410e-02   9.612512e-01    2.546816e-04
## 258     Speckles       4.145067e-01   5.736963e-01    1.179706e-02
## 259     Speckles       8.748110e-02   9.119045e-01    6.144061e-04
## 260     Speckles       1.524191e-02   9.847411e-01    1.695814e-05
## 261     Speckles       3.719493e-01   6.105849e-01    1.746586e-02
## 262     Speckles       4.218295e-01   5.696674e-01    8.503113e-03
## 263     Speckles       2.037056e-01   7.802093e-01    1.608513e-02
## 264     Speckles       1.574402e-01   8.415312e-01    1.028610e-03
## 265     Speckles       2.028313e-02   9.789556e-01    7.612973e-04
## 266     Speckles       1.011956e-02   9.898499e-01    3.049414e-05
## 267     Speckles       1.845596e-01   8.119241e-01    3.516350e-03
## 268 Intermediate       6.901353e-01   2.181660e-01    9.169870e-02
## 269 Intermediate       5.443023e-01   4.423491e-01    1.334859e-02
## 270    Territory       1.501742e-03   2.449704e-07    9.984980e-01
## 271    Territory       1.448419e-02   1.032138e-04    9.854126e-01
## 272    Territory       1.238953e-01   6.367432e-04    8.754680e-01
## 273 Intermediate       4.729328e-01   5.976213e-02    4.673051e-01
## 274    Territory       9.751662e-03   1.417321e-05    9.902342e-01
## 275    Territory       3.371206e-01   2.234823e-02    6.405311e-01
## 276    Territory       8.721715e-03   5.683783e-06    9.912726e-01
## 277    Territory       1.153739e-02   1.691970e-05    9.884457e-01
## 278    Territory       1.273698e-01   3.813658e-04    8.722489e-01
## 279    Territory       4.011780e-02   3.234185e-05    9.598499e-01
## 280    Territory       1.017412e-01   5.914277e-04    8.976674e-01
## 281    Territory       4.722886e-01   9.122048e-03    5.185894e-01
## 282    Territory       9.898419e-03   3.020925e-05    9.900714e-01
## 283    Territory       3.222130e-01   2.817241e-02    6.496146e-01
## 284    Territory       1.341703e-02   7.954137e-06    9.865750e-01
## 285    Territory       2.995370e-01   4.670657e-03    6.957924e-01
## 286    Territory       9.098808e-03   4.499900e-06    9.908967e-01
## 287 Intermediate       4.793056e-01   1.095710e-01    4.111233e-01
## 288    Territory       1.814316e-03   4.878285e-07    9.981852e-01
## 289    Territory       2.501441e-02   2.595686e-04    9.747260e-01
## 290    Territory       4.110505e-02   5.565966e-04    9.583383e-01
## 291    Territory       2.204362e-01   1.032174e-04    7.794606e-01
## 292    Territory       3.667855e-01   1.924857e-02    6.139659e-01
## 293    Territory       1.016033e-01   9.594386e-04    8.974372e-01
## 294    Territory       8.393571e-02   1.864944e-03    9.141993e-01
## 295    Territory       3.265179e-01   3.804477e-03    6.696777e-01
## 296 Intermediate       6.786614e-01   1.609426e-01    1.603960e-01
## 297    Territory       3.273890e-01   1.269974e-02    6.599113e-01
## 298    Territory       4.100258e-01   1.701035e-02    5.729638e-01
## 299    Territory       3.250007e-01   5.800822e-03    6.691985e-01
## 300    Territory       4.877566e-02   3.185726e-04    9.509058e-01
## 301    Territory       2.868178e-01   1.303772e-02    7.001445e-01
## 302 Intermediate       6.691762e-01   1.930609e-01    1.377630e-01
## 303 Intermediate       4.884199e-01   4.673784e-02    4.648422e-01
## 304    Territory       1.292643e-01   2.083424e-03    8.686522e-01
## 305    Territory       3.387482e-01   1.909165e-02    6.421601e-01
## 306    Territory       2.990598e-01   2.822278e-02    6.727174e-01
## 307    Territory       1.406821e-01   2.153664e-03    8.571642e-01
## 308    Territory       1.949171e-01   5.124945e-03    7.999580e-01
## 309    Territory       5.847822e-02   7.320651e-04    9.407897e-01
## 310    Territory       5.446412e-02   1.988232e-04    9.453371e-01
## 311     Speckles       3.490926e-01   4.697305e-01    1.811769e-01
## 312    Territory       8.120024e-03   5.241428e-05    9.918276e-01
## 313    Territory       3.473655e-01   6.021764e-02    5.924169e-01
## 314 Intermediate       6.057083e-01   3.975479e-02    3.545369e-01
## 315    Territory       1.754903e-02   9.921960e-06    9.824410e-01
## 316 Intermediate       7.023601e-01   1.273595e-01    1.702804e-01
## 317    Territory       1.120328e-03   1.694204e-07    9.988795e-01
## 318    Territory       2.506630e-01   2.472987e-02    7.246071e-01
## 319    Territory       2.506630e-01   2.472987e-02    7.246071e-01
## 320    Territory       1.743859e-02   6.464477e-06    9.825549e-01
## 321 Intermediate       6.626297e-01   3.043195e-01    3.305086e-02
## 322    Territory       2.462949e-01   6.943344e-03    7.467618e-01
## 323    Territory       1.033479e-01   1.810874e-04    8.964710e-01
## 324    Territory       7.432827e-03   2.443880e-06    9.925647e-01
## 325    Territory       5.468742e-03   6.927964e-06    9.945243e-01
## 326    Territory       1.870479e-03   1.612096e-06    9.981279e-01
## 327    Territory       2.149680e-01   1.045647e-02    7.745755e-01
## 328    Territory       2.498004e-01   2.820259e-02    7.219970e-01
## 329    Territory       3.534906e-01   8.651075e-03    6.378583e-01
## 330    Territory       4.730851e-01   1.218593e-02    5.147290e-01
## 331    Territory       1.073492e-01   1.557798e-03    8.910930e-01
## 332    Territory       3.633873e-01   7.711373e-02    5.594990e-01
## 333     Speckles       3.505416e-01   5.897284e-01    5.973004e-02
## 334    Territory       3.969787e-02   1.810700e-05    9.602840e-01
## 335    Territory       5.471642e-02   5.625372e-05    9.452273e-01
## 336     Speckles       4.556942e-01   4.823569e-01    6.194891e-02
## 337    Territory       2.780721e-01   9.678722e-04    7.209600e-01
## 338    Territory       8.083709e-05   6.970526e-08    9.999191e-01
## 339    Territory       4.388036e-01   2.738948e-03    5.584574e-01
## 340    Territory       1.087290e-01   8.777645e-04    8.903933e-01
## 341    Territory       2.026008e-01   3.373575e-03    7.940257e-01
## 342    Territory       3.482829e-01   1.416439e-02    6.375527e-01
## 343    Territory       2.042594e-02   5.456068e-05    9.795195e-01
## 344 Intermediate       6.741879e-01   3.812493e-02    2.876872e-01
## 345    Territory       1.455538e-02   9.943945e-05    9.853452e-01
## 346    Territory       3.218840e-02   1.242100e-04    9.676874e-01
## 347 Intermediate       7.822000e-01   8.234183e-02    1.354582e-01
## 348    Territory       2.596336e-02   1.377486e-05    9.740229e-01
## 349    Territory       4.600680e-01   5.098815e-02    4.889439e-01
## 350    Territory       3.732789e-02   3.157657e-04    9.623563e-01
## 351    Territory       3.072642e-02   1.581747e-04    9.691154e-01
## 352    Territory       2.697628e-03   3.663014e-07    9.973020e-01
## 353 Intermediate       6.101766e-01   5.527792e-02    3.345455e-01
## 354    Territory       1.376335e-01   1.904116e-03    8.604624e-01
## 355    Territory       2.999766e-01   7.563428e-02    6.243892e-01
## 356    Territory       5.960638e-02   2.643699e-04    9.401293e-01
## 357    Territory       4.369902e-02   5.189360e-05    9.562491e-01
## 358    Territory       6.147696e-02   1.287314e-04    9.383943e-01
## 359 Intermediate       6.238080e-01   6.933543e-02    3.068566e-01
## 360    Territory       4.492871e-02   4.469152e-04    9.546244e-01
## 361    Territory       1.397782e-02   1.455397e-05    9.860076e-01
## 362    Territory       3.595670e-01   1.672326e-02    6.237098e-01
## 363    Territory       2.539488e-02   3.921985e-05    9.745659e-01
## 364    Territory       2.064653e-02   1.308881e-04    9.792226e-01
## 365    Territory       3.942628e-02   2.499444e-04    9.603238e-01
## 366    Territory       1.996242e-01   3.102390e-03    7.972734e-01
## 367    Territory       1.636563e-02   1.889055e-04    9.834455e-01
## 368    Territory       7.513809e-02   1.023408e-03    9.238385e-01
## 369    Territory       5.180903e-04   1.186597e-08    9.994819e-01
## 370    Territory       2.486540e-01   8.063161e-03    7.432829e-01
## 371    Territory       9.907531e-02   9.613213e-04    8.999634e-01
## 372    Territory       4.744761e-01   4.538451e-02    4.801394e-01
## 373 Intermediate       7.337704e-01   1.404247e-01    1.258048e-01
## 374    Territory       2.188296e-04   1.864402e-08    9.997812e-01
## 375    Territory       3.536404e-01   5.892920e-03    6.404667e-01
## 376    Territory       1.610468e-01   2.251277e-03    8.367019e-01
## 377    Territory       3.216944e-02   5.447604e-05    9.677761e-01
## 378    Territory       4.476767e-02   3.581327e-04    9.548742e-01
## 379    Territory       1.573349e-01   2.387963e-03    8.402772e-01
## 380    Territory       3.712625e-01   1.275208e-02    6.159854e-01
## 381    Territory       4.518343e-02   3.884810e-04    9.544281e-01
## 382    Territory       1.527280e-02   1.369025e-05    9.847135e-01
## 383    Territory       2.257970e-01   1.575388e-02    7.584491e-01
## 384 Intermediate       6.423334e-01   4.270380e-02    3.149628e-01
## 385    Territory       4.042857e-01   3.029923e-02    5.654151e-01
## 386    Territory       1.527088e-01   1.301477e-03    8.459897e-01
## 387    Territory       3.101813e-02   2.922433e-04    9.686896e-01
## 388 Intermediate       6.578401e-01   2.105794e-02    3.211020e-01
## 389    Territory       1.776687e-01   2.544176e-04    8.220769e-01
## 390    Territory       1.569225e-01   6.918774e-04    8.423856e-01
## 391    Territory       3.805699e-02   4.007133e-05    9.619029e-01
## 392    Territory       2.368111e-02   4.522246e-05    9.762737e-01
## 393    Territory       5.340802e-02   1.046186e-04    9.464874e-01
## 394 Intermediate       5.866720e-01   1.559279e-01    2.574001e-01
## 395    Territory       1.154760e-01   1.190342e-03    8.833337e-01
## 396    Territory       3.652917e-01   9.427411e-02    5.404342e-01
## 397 Intermediate       6.446405e-01   1.052504e-01    2.501091e-01
## 398    Territory       3.726922e-02   2.890844e-05    9.627019e-01
## 399    Territory       5.043191e-02   1.816224e-04    9.493865e-01
## 400    Territory       2.061842e-03   1.921542e-07    9.979380e-01
## 401    Territory       2.663339e-02   4.469702e-05    9.733219e-01
## 402    Territory       1.505738e-01   2.109041e-03    8.473172e-01
## 403    Territory       2.157204e-03   6.985854e-07    9.978421e-01
## 404    Territory       1.293634e-01   3.313342e-03    8.673233e-01
##     res_fin.Phenotype
## 1        Intermediate
## 2        Intermediate
## 3        Intermediate
## 4        Intermediate
## 5        Intermediate
## 6        Intermediate
## 7        Intermediate
## 8        Intermediate
## 9        Intermediate
## 10       Intermediate
## 11       Intermediate
## 12       Intermediate
## 13       Intermediate
## 14       Intermediate
## 15       Intermediate
## 16       Intermediate
## 17       Intermediate
## 18       Intermediate
## 19       Intermediate
## 20       Intermediate
## 21       Intermediate
## 22       Intermediate
## 23       Intermediate
## 24       Intermediate
## 25       Intermediate
## 26       Intermediate
## 27       Intermediate
## 28       Intermediate
## 29       Intermediate
## 30       Intermediate
## 31       Intermediate
## 32       Intermediate
## 33       Intermediate
## 34       Intermediate
## 35       Intermediate
## 36       Intermediate
## 37       Intermediate
## 38       Intermediate
## 39       Intermediate
## 40       Intermediate
## 41       Intermediate
## 42       Intermediate
## 43       Intermediate
## 44       Intermediate
## 45       Intermediate
## 46       Intermediate
## 47       Intermediate
## 48       Intermediate
## 49       Intermediate
## 50       Intermediate
## 51       Intermediate
## 52       Intermediate
## 53       Intermediate
## 54       Intermediate
## 55       Intermediate
## 56       Intermediate
## 57       Intermediate
## 58       Intermediate
## 59       Intermediate
## 60       Intermediate
## 61       Intermediate
## 62       Intermediate
## 63       Intermediate
## 64       Intermediate
## 65       Intermediate
## 66       Intermediate
## 67       Intermediate
## 68       Intermediate
## 69       Intermediate
## 70       Intermediate
## 71       Intermediate
## 72       Intermediate
## 73       Intermediate
## 74       Intermediate
## 75       Intermediate
## 76       Intermediate
## 77       Intermediate
## 78       Intermediate
## 79       Intermediate
## 80       Intermediate
## 81       Intermediate
## 82       Intermediate
## 83       Intermediate
## 84       Intermediate
## 85       Intermediate
## 86       Intermediate
## 87       Intermediate
## 88       Intermediate
## 89       Intermediate
## 90       Intermediate
## 91       Intermediate
## 92       Intermediate
## 93       Intermediate
## 94       Intermediate
## 95       Intermediate
## 96       Intermediate
## 97       Intermediate
## 98       Intermediate
## 99       Intermediate
## 100      Intermediate
## 101      Intermediate
## 102      Intermediate
## 103      Intermediate
## 104      Intermediate
## 105      Intermediate
## 106      Intermediate
## 107      Intermediate
## 108      Intermediate
## 109      Intermediate
## 110      Intermediate
## 111      Intermediate
## 112      Intermediate
## 113      Intermediate
## 114      Intermediate
## 115      Intermediate
## 116      Intermediate
## 117      Intermediate
## 118      Intermediate
## 119      Intermediate
## 120      Intermediate
## 121      Intermediate
## 122      Intermediate
## 123      Intermediate
## 124      Intermediate
## 125      Intermediate
## 126      Intermediate
## 127      Intermediate
## 128      Intermediate
## 129      Intermediate
## 130      Intermediate
## 131      Intermediate
## 132      Intermediate
## 133      Intermediate
## 134      Intermediate
## 135          Speckles
## 136          Speckles
## 137          Speckles
## 138          Speckles
## 139          Speckles
## 140          Speckles
## 141          Speckles
## 142          Speckles
## 143          Speckles
## 144          Speckles
## 145          Speckles
## 146          Speckles
## 147          Speckles
## 148          Speckles
## 149          Speckles
## 150          Speckles
## 151          Speckles
## 152          Speckles
## 153          Speckles
## 154          Speckles
## 155          Speckles
## 156          Speckles
## 157          Speckles
## 158          Speckles
## 159          Speckles
## 160          Speckles
## 161          Speckles
## 162          Speckles
## 163          Speckles
## 164          Speckles
## 165          Speckles
## 166          Speckles
## 167          Speckles
## 168          Speckles
## 169          Speckles
## 170          Speckles
## 171          Speckles
## 172          Speckles
## 173          Speckles
## 174          Speckles
## 175          Speckles
## 176          Speckles
## 177          Speckles
## 178          Speckles
## 179          Speckles
## 180          Speckles
## 181          Speckles
## 182          Speckles
## 183          Speckles
## 184          Speckles
## 185          Speckles
## 186          Speckles
## 187          Speckles
## 188          Speckles
## 189          Speckles
## 190          Speckles
## 191          Speckles
## 192          Speckles
## 193          Speckles
## 194          Speckles
## 195          Speckles
## 196          Speckles
## 197          Speckles
## 198          Speckles
## 199          Speckles
## 200          Speckles
## 201          Speckles
## 202          Speckles
## 203          Speckles
## 204          Speckles
## 205          Speckles
## 206          Speckles
## 207          Speckles
## 208          Speckles
## 209          Speckles
## 210          Speckles
## 211          Speckles
## 212          Speckles
## 213          Speckles
## 214          Speckles
## 215          Speckles
## 216          Speckles
## 217          Speckles
## 218          Speckles
## 219          Speckles
## 220          Speckles
## 221          Speckles
## 222          Speckles
## 223          Speckles
## 224          Speckles
## 225          Speckles
## 226          Speckles
## 227          Speckles
## 228          Speckles
## 229          Speckles
## 230          Speckles
## 231          Speckles
## 232          Speckles
## 233          Speckles
## 234          Speckles
## 235          Speckles
## 236          Speckles
## 237          Speckles
## 238          Speckles
## 239          Speckles
## 240          Speckles
## 241          Speckles
## 242          Speckles
## 243          Speckles
## 244          Speckles
## 245          Speckles
## 246          Speckles
## 247          Speckles
## 248          Speckles
## 249          Speckles
## 250          Speckles
## 251          Speckles
## 252          Speckles
## 253          Speckles
## 254          Speckles
## 255          Speckles
## 256          Speckles
## 257          Speckles
## 258          Speckles
## 259          Speckles
## 260          Speckles
## 261          Speckles
## 262          Speckles
## 263          Speckles
## 264          Speckles
## 265          Speckles
## 266          Speckles
## 267          Speckles
## 268          Speckles
## 269          Speckles
## 270         Territory
## 271         Territory
## 272         Territory
## 273         Territory
## 274         Territory
## 275         Territory
## 276         Territory
## 277         Territory
## 278         Territory
## 279         Territory
## 280         Territory
## 281         Territory
## 282         Territory
## 283         Territory
## 284         Territory
## 285         Territory
## 286         Territory
## 287         Territory
## 288         Territory
## 289         Territory
## 290         Territory
## 291         Territory
## 292         Territory
## 293         Territory
## 294         Territory
## 295         Territory
## 296         Territory
## 297         Territory
## 298         Territory
## 299         Territory
## 300         Territory
## 301         Territory
## 302         Territory
## 303         Territory
## 304         Territory
## 305         Territory
## 306         Territory
## 307         Territory
## 308         Territory
## 309         Territory
## 310         Territory
## 311         Territory
## 312         Territory
## 313         Territory
## 314         Territory
## 315         Territory
## 316         Territory
## 317         Territory
## 318         Territory
## 319         Territory
## 320         Territory
## 321         Territory
## 322         Territory
## 323         Territory
## 324         Territory
## 325         Territory
## 326         Territory
## 327         Territory
## 328         Territory
## 329         Territory
## 330         Territory
## 331         Territory
## 332         Territory
## 333         Territory
## 334         Territory
## 335         Territory
## 336         Territory
## 337         Territory
## 338         Territory
## 339         Territory
## 340         Territory
## 341         Territory
## 342         Territory
## 343         Territory
## 344         Territory
## 345         Territory
## 346         Territory
## 347         Territory
## 348         Territory
## 349         Territory
## 350         Territory
## 351         Territory
## 352         Territory
## 353         Territory
## 354         Territory
## 355         Territory
## 356         Territory
## 357         Territory
## 358         Territory
## 359         Territory
## 360         Territory
## 361         Territory
## 362         Territory
## 363         Territory
## 364         Territory
## 365         Territory
## 366         Territory
## 367         Territory
## 368         Territory
## 369         Territory
## 370         Territory
## 371         Territory
## 372         Territory
## 373         Territory
## 374         Territory
## 375         Territory
## 376         Territory
## 377         Territory
## 378         Territory
## 379         Territory
## 380         Territory
## 381         Territory
## 382         Territory
## 383         Territory
## 384         Territory
## 385         Territory
## 386         Territory
## 387         Territory
## 388         Territory
## 389         Territory
## 390         Territory
## 391         Territory
## 392         Territory
## 393         Territory
## 394         Territory
## 395         Territory
## 396         Territory
## 397         Territory
## 398         Territory
## 399         Territory
## 400         Territory
## 401         Territory
## 402         Territory
## 403         Territory
## 404         Territory
saveRDS(df_res,file="Output_ElasticNet.rds")

Final accuracy is 81.5%. Just by quickly glancing at the output dataframe (df_res ; stored locally), intermediates appear to be the most frequently misclassified class. Further, in multiple instances territories or speckles get classifed as intermediates incorrectly. Note: Some of these misclassified cases genuinely appear to have the wrong class label - Revise class labels?

Lets try more sophisticated ML approaches.

Support Vector Machines

Test SVM with Radial Basis Function as kernel without any feature selection.

ToDo: Feature selection methods tests

filt_recipe_svm <- recipe(Phenotype~.,data=res_fin) %>%
  step_normalize(all_numeric_predictors()) %>%
  step_nzv(all_predictors())


### Lets not eliminate correlated features unlike logistic regression this time. Lets try supervised feature selection methods  -- 
## Unfortunately, tidymodels doesn't have supervised feature selection methods incorporated yet. You need to use caret for approaches like Recursive Feature Elimination https://topepo.github.io/caret/recursive-feature-elimination.html


### Method 1 - Extract features ranked by Variable Importance from our gradient Boosted model using vip package -- NOTE THAT THIS IS JUST AN IDEA AS VIP PACKAGE NEEDS R v4.1 while mine is R v4

#res_model_xgboost_workflow_final %>% fit(res_testing) %>% vip::vi() %>% filter(Importance>0)

### Extracting feature importance from xgboosted trees is also not striaghtforward - lets just use a random forest to check 

### Lets try recipeselectors package instead -- Recursive Feature eleimination .ie. backward selection

### define basic random forest model with default parameters for feature selection to use for SVM -- BAD PRACTICE? 

res_model_svm <- svm_rbf(cost = tune(),rbf_sigma = tune(),margin = tune()) %>% set_mode("classification") %>% set_engine("kernlab")

### no feature selection ! - Lets leave them in for now - examine variable importance later -- NEED TO UPDATE R AND GET VIP PACKGE !
filt_recipe_svm <- recipe(Phenotype~.,data=res_fin) %>%
  step_scale(all_numeric_predictors()) %>%
  step_nzv(all_predictors())


res_model_svm_workflow <- workflow() %>% add_recipe(filt_recipe_svm) %>% add_model(res_model_svm) 

penalty_grid_svm <- grid_random(cost(),
                                rbf_sigma(),
                                svm_margin(),
                                size=50)

svm_grid<- tune_grid(res_model_svm_workflow,resamples = res_folds,grid=penalty_grid_svm)

autoplot(svm_grid,metric="accuracy")  ## ~ 80% accuracy for top models

Best performing models have up to 80% accuracy. This is also lower than previous tests.

best_props_svm <- svm_grid %>% select_best(metric="accuracy")

svm_model_metrics <- finalize_workflow(res_model_svm_workflow,best_props_svm) %>% fit_resamples(resamples=res_folds) %>% collect_metrics()

## Finalize workflow and return final metric on Test -- Close to train values !

res_model_svm_final <- finalize_workflow(res_model_svm_workflow,best_props_svm) %>% last_fit(res_split)   ### final fit to train-test split- 89% accuracy
res_model_svm_final %>% collect_metrics()
## # A tibble: 3 × 4
##   .metric     .estimator .estimate .config             
##   <chr>       <chr>          <dbl> <chr>               
## 1 accuracy    multiclass     0.753 Preprocessor1_Model1
## 2 roc_auc     hand_till      0.912 Preprocessor1_Model1
## 3 brier_class multiclass     0.161 Preprocessor1_Model1

Final accuracy is about 75%. Lets try Gradient Boosted Decision Trees

XGBoost Trees

Note that Xgboost has a lot of tunable parameters, so we will adopt a two stage tuning where we tune the most important parameters first (tree depth, learning rate and number of trees) and then tune the rest later.

### No need to remove correlated variables here ! Tree based models are less sensitive to this 

filt_recipe_xgboost <- recipe(Phenotype~.,data=res_fin) %>%
  step_scale(all_numeric_predictors()) %>%
  step_nzv(all_predictors())
  
## train recipe - Also optional -Note that when we add recipe object to workflow, you dont need preprocessing steps by prep and bake 
filt_prep_xgboost <- filt_recipe_xgboost %>% prep(training=res_training)

#### Define model 

### xgboost has a lot of hyperparameters ! -- Can't tune all of them at once (as it will search through higher dim. space) - Lets tune 3 parameters first
### and then tune rest in second stage for best Stage 1

res_model_xgboost_stage1 <- boost_tree(learn_rate = tune(),trees=tune(),tree_depth = tune()) %>% set_mode("classification") %>% set_engine("xgboost")

res_model_xgboost_workflow_stage1 <- workflow() %>% add_recipe(filt_recipe_xgboost) %>% add_model(res_model_xgboost_stage1) 

## go for random grid values
penalty_grid_stage1 <- grid_random(learn_rate(),
                                   trees(),
                                   tree_depth(),
                                   size=50)   ## 50 random options for optimal learning rate search 

xgboost_grid_stage1 <- tune_grid(res_model_xgboost_workflow_stage1,resamples = res_folds,grid=penalty_grid_stage1)

saveRDS(xgboost_grid_stage1,file="xgboost_grid_stage1.rds")

autoplot(xgboost_grid_stage1,metric="accuracy")  ## can see that best model tops 90% accuracy

best_props_stage1 <- xgboost_grid_stage1 %>% select_best(metric="accuracy")

###### Stage 2 of model, optimze rest of parameters with Learning rate held constant - These are slightly less important 

res_model_xgboost_stage2 <- res_model_xgboost_stage1 %>% set_args(learn_rate=best_props_stage1$learn_rate,
                                                                                    tree_depth=best_props_stage1$tree_depth,
                                                                                    trees=best_props_stage1$trees,
                                                                                    loss_reduction=tune(),
                                                                                    stop_iter=tune(),
                                                                                    min_n=tune())

res_model_xgboost_workflow_stage2 <- res_model_xgboost_workflow_stage1 %>% update_model(res_model_xgboost_stage2)


## 2nd stage tune_grid 

penalty_grid_stage2 <- grid_random(loss_reduction(),
                                   stop_iter(),
                                   min_n(),
                                   size=30) 


xgboost_grid_stage2 <- tune_grid(res_model_xgboost_workflow_stage2,resamples = res_folds,grid=penalty_grid_stage2)

autoplot(xgboost_grid_stage2,metric="accuracy")  ## ~ 91% accuracy for top models

best_props_stage2 <- xgboost_grid_stage2 %>% select_best(metric="accuracy")

###### Stage 2 of model, optimze rest of parameters with Learning rate held constant - These are slightly less important 

res_model_xgboost_final <- res_model_xgboost_stage2 %>% set_args(learn_rate=best_props_stage1$learn_rate,
                                                                                    tree_depth=best_props_stage1$tree_depth,
                                                                                    trees=best_props_stage1$trees,
                                                                                    loss_reduction=best_props_stage2$loss_reduction,
                                                                                    stop_iter=best_props_stage2$stop_iter,
                                                                                    min_n=best_props_stage2$min_n)

res_model_xgboost_workflow_final <- res_model_xgboost_workflow_stage2 %>% update_model(res_model_xgboost_final)

#### Last fit !

res_model_xgboost_lastfit <- res_model_xgboost_workflow_final %>% last_fit(res_split)

res_model_xgboost_lastfit %>% collect_metrics() 
## # A tibble: 3 × 4
##   .metric     .estimator .estimate .config             
##   <chr>       <chr>          <dbl> <chr>               
## 1 accuracy    multiclass     0.728 Preprocessor1_Model1
## 2 roc_auc     hand_till      0.875 Preprocessor1_Model1
## 3 brier_class multiclass     0.213 Preprocessor1_Model1

Using XGBoost doesn’t signficantly improve prediction accuracy with this dataset. This is unlike previous attempt where we achieved upto 95% accuracy.

In general, XGboost > SVM > logistic regression with regularization ! -

It’s also possible to use Convolutional Neural Networks as shown below, however we have insufficient images for training properly (final accuracies are in the order of 60%). Explore this in the future !

ToDo: TensorFlow on R is not as up to date as Python. Had to revert to deprecated functions (image_data_generator()). Explore training on non-augmented data as well as other CNN architectures in Python.

Sys.setenv(RETICULATE_PYTHON = "/Users/ra36doj/.virtualenvs/r-tensorflow/bin/python")


#remotes::install_github("rstudio/tensorflow")
#reticulate::install_python()
#install_tensorflow()
library(tensorflow)
#install.packages("keras")
library(keras)

#install.packages("remotes")
#remotes::install_github(sprintf("rstudio/%s", c("reticulate", "tensorflow", "keras")))
#reticulate::miniconda_uninstall() # start with a blank slate
#reticulate::install_miniconda()
#keras::install_keras()

tensorflow::as_tensor("Hello World")

list_files_TF <- list.files(path = "lowResCrop_TF/",pattern=".png$",full.names = T)

### inspire by https://shirinsplayground.netlify.app/2018/06/keras_fruits/
phenotype_list <- c("Blobs","LowSignal","Territory")
output_n <- length(phenotype_list)

source_dir <- "lowResCrop_TF"

### create directories containing train and validation data with structure https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d

#Main directory
dir.create("NeuralNetwork")

#sub directories for validation and training
dir.create("NeuralNetwork/Train")
dir.create("NeuralNetwork/Test")

## subdirectories for 3 classes within 

for (dir in c("Train","Test")){
  for(phen in phenotype_list){
    dir.create(paste0("NeuralNetwork/",dir,"/",phen))
  }
}

### Note that here we arent adjusting for class imbalance -- is not substantial -- territories (highest class) have 25% more images than blobs (lowest)

set.seed(12552)

train_indices <- sample.int(length(list_files_TF),0.75*length(list_files_TF))

### Copy files into suitable directories

i <- 1

for (i in 1:length(list_files_TF)){
    
  filename <- list_files_TF[i]

  if (i %in% train_indices){
    file.copy(filename,paste0("NeuralNetwork/Train/",gsub("_.*","",gsub("lowResCrop_TF//","",filename))))
  } else {
    file.copy(filename,paste0("NeuralNetwork/Test/",gsub("_.*","",gsub("lowResCrop_TF//","",filename))))
  }
  
}


##### Note -- Sometimes some hidden files are generated during copy paste, presumably from Fiji -- These throw an error later 
##### Delete them before proceeding : Navigate to folder NeuralNetwork folder (containing train and test folders) and execute the below command 
##### find . -name ".*" -exec rm -rf {} \;

### Image dimensions - Note that some of our images are not 20x20, can be a bit lower (edge cases) - Will they be handled properly? 

img_width <- 20
img_height <- 20
target_size <- c(img_width, img_height)

channels <- 3  #RGB

# path to image folders
train_image_files_path <- "NeuralNetwork/Train/"
valid_image_files_path <- "NeuralNetwork/Test/"


### Augment training data (but not test data!)

train_data_gen = image_data_generator(
  rescale = 1/255,
  rotation_range = 180,
  width_shift_range = 0.2,
  height_shift_range = 0.2,
  shear_range = 0.2,  ## radian value: corresponds to shear angle of max ~10 degrees
  #zoom_range = 0.2,
  horizontal_flip = TRUE,
  vertical_flip=TRUE,
  fill_mode = "nearest"
)

# Validation data shouldn't be augmented! But it should also be scaled.
valid_data_gen <- image_data_generator(
  rescale = 1/255
  )  




# training images
train_image_array_gen <- flow_images_from_directory(train_image_files_path, 
                                          train_data_gen,
                                          target_size = target_size,
                                          class_mode = "categorical",
                                          classes = phenotype_list,
                                          seed = 42)

# validation images
valid_image_array_gen <- flow_images_from_directory(valid_image_files_path, 
                                          valid_data_gen,
                                          target_size = target_size,
                                          class_mode = "categorical",
                                          classes = phenotype_list,
                                          seed = 42)


table(factor(train_image_array_gen$classes))  ## can see some class imbalance ! - lets explore later if you want to tweak here 

train_image_array_gen$class_indices ## blobs are classified as 0, LowSig as 1 and territories as 2



#### Define model 

# number of training samples
train_samples <- train_image_array_gen$n
# number of validation samples
valid_samples <- valid_image_array_gen$n

# define batch size and number of epochs
batch_size <- 32
epochs <- 10

## Model 
# initialise model
#model <- keras_model_sequential()

# add layers
# model %>%
#   ### too many output nodes? -- maybe reduce to 16 and next layer to 8?
#   layer_conv_2d(filter = 16, kernel_size = c(3,3), padding = "same", input_shape = c(img_width, img_height, channels)) %>%
#   layer_activation("relu") %>%
#   
#   # Second hidden layer
#   layer_conv_2d(filter = 8, kernel_size = c(3,3), padding = "same") %>%
#   layer_activation_leaky_relu(0.5) %>%
#   layer_batch_normalization() %>%
# 
#   # Use max pooling
#   layer_max_pooling_2d(pool_size = c(2,2)) %>%
#   layer_dropout(0.25) %>%
#   
#   # Flatten max filtered output into feature vector 
#   # and feed into dense layer
#   layer_flatten() %>%
#   layer_dense(64) %>%
#   layer_activation("relu") %>%
#   layer_dropout(0.5) %>%
# 
#   # Outputs from dense layer are projected onto output layer
#   layer_dense(output_n) %>% 
#   layer_activation("softmax")

model <- keras_model_sequential()
model %>%
  layer_flatten(input_shape = c(20, 20)) %>%
  layer_dense(units = 128, activation = 'relu') %>%
  layer_dense(units = 3, activation = 'softmax')

# compile
model %>% compile(
  loss = "categorical_crossentropy",
  optimizer = optimizer_rmsprop(),      ### using default optimizer values ! -- Need to check !
  metrics = "accuracy"
)


### fit model 


# dir.create("checkpoints")
# dir.create("logs")

hist <- model %>% fit_generator(
  # training data
  train_image_array_gen,
  
  # epochs
  steps_per_epoch = as.integer(train_samples / batch_size), 
  epochs = epochs, 
  
  # validation data
  validation_data = valid_image_array_gen,
  validation_steps = as.integer(valid_samples / batch_size),
  
  # print progress
  verbose = 2,
  callbacks = list(
    # save best model after every epoch
    callback_model_checkpoint("/Users/ra36doj/Desktop/mount/DeepLearning/ImageClassifier/240420_test/checkpoints", save_best_only = TRUE),
    # only needed for visualising with TensorBoard
    callback_tensorboard(log_dir = "/Users/ra36doj/Desktop/mount/DeepLearning/ImageClassifier/240420_test/logs")
  )
)

For now, go with xgboost model to get preliminary predictions for final dataset.

First display images from each genotype (12 total) that we need to classify.

#newData <- list.files(path = "TestImages/",pattern=".tif$",full.names = T)
newData <- list.files(path = "Images_classify/",pattern=".tif$",full.names = T)

## 6 unique categories - 
#newData_categories <- unique(gsub("_\\d+_RGB.*\\.tif$","",gsub("TestImages//Default.*_67_","",newData)))
#newData_categories <- unique(gsub("_\\d+_RGB.*\\.tif$","",gsub("Images_classify//dapi_gfp_.*","",newData)))
newData_categories <- unique(gsub("^_","",regmatches(newData, regexpr("_[^_]+_[^_]+_[^_]+(?=_\\d+_\\d+_RGB\\.tif$)", newData, perl=TRUE))))



## Display 6 nuclei from each cateogory

image_raw <- lapply(list_files, function(i) readTIFF(i,as.is = T)[,,2])

i <- 1
j <- 1

set.seed(12554)

par(mfrow=c(2,3)) ## adjust margins based on number of images
for(i in 1:length(newData_categories)){
  
  my_category_images <- newData[grep(newData_categories[i],newData)]
  my_category_images_sub <- my_category_images[sample.int(length(my_category_images),6)]
  
  for (j in 1:6){
   image(readTIFF(my_category_images_sub[j],as.is=T)[,,2],col=gray.colors(256), main=paste(newData_categories[i],"_",j),asp=1)
   axis(1,0:1,0:1) 
  }
  
  
}

Calculate features and predict.

res_method_newData <- as.data.frame(matrix(nrow=length(newData),ncol=46))

i <- 1

for (i in 1:length(newData)){
  
  y <- readImage(newData[i])[,,2]
  #EBImage::display(y)
  
  #hist(as.vector(imageData(y)))  #### for visualization purposes in test cases -- Our background is around 0.15-0.2 scaled intensity 
  
  
  #### Compute Haralick features for entire image
  
  x <- Image(1,dim=dim(y))   
  
  res_method_newData[i,1:26] <- colMeans(computeFeatures.haralick(x, y))

  
  ### Add mean intensity of image
  
  res_method_newData[i,27] <- mean(imageData(y))
  
  ### Additional object dependent features 
  
  ### explore thresholding for object features 

  ## Local background based thresholding to generate binary image
  y_2 <- gblur(y,sigma=0.5)  ### mild blurring - sigma must be selected appropriately !
  
  #EBImage::display(y_2,all=T)

  cutoff <- 0.3 ## background cutoff selected based on histogram      
  x <- y_2 > cutoff 
  
  #EBImage::display(x,all=T)
  
  #if no objects detected, then object feature is 0
  if(sum(imageData(x))==0){
    res_method_newData[i,28:46] <- 0
  } else {
    res_method_newData[i,28:46] <- c(computeFeatures.moment(x,y), computeFeatures.shape(x), computeFeatures.basic(x,y))

  }

  rownames(res_method_newData)[i] <- gsub("_RGB.tif","",gsub("Images_classify//","",newData[i]))
}

df_res_newData <- data.frame(res_model_xgboost_lastfit %>% extract_workflow() %>% predict(res_method_newData),res_model_xgboost_lastfit %>% extract_workflow() %>% predict(res_method_newData,type="prob"),rownames(res_method_newData))  

###### Predictions look good for single cells, but a fraction of images have >1 nuclei in field of view - This throws off predictions more in 
##### favour of territories as its localized in some sense

df_res_newData <- df_res_newData %>% mutate(Genotype=case_when(grepl(newData_categories[1],rownames.res_method_newData.)~newData_categories[1],
                                                               grepl(newData_categories[2],rownames.res_method_newData.)~newData_categories[2],
                                                               grepl(newData_categories[3],rownames.res_method_newData.)~newData_categories[3],
                                                               grepl(newData_categories[4],rownames.res_method_newData.)~newData_categories[4],
                                                               grepl(newData_categories[5],rownames.res_method_newData.)~newData_categories[5],
                                                               grepl(newData_categories[6],rownames.res_method_newData.)~newData_categories[6],
                                                               grepl(newData_categories[7],rownames.res_method_newData.)~newData_categories[7],
                                                               grepl(newData_categories[8],rownames.res_method_newData.)~newData_categories[8],
                                                               grepl(newData_categories[9],rownames.res_method_newData.)~newData_categories[9],
                                                               grepl(newData_categories[10],rownames.res_method_newData.)~newData_categories[10],
                                                               grepl(newData_categories[11],rownames.res_method_newData.)~newData_categories[11],
                                                               grepl(newData_categories[12],rownames.res_method_newData.)~newData_categories[12]))
                                                               
                                                               
saveRDS(df_res_newData,"df_res_newData.rds")

head(df_res_newData)
##    .pred_class .pred_Intermediate .pred_Speckles .pred_Territory
## 1    Territory        0.001845192   0.0003469431    9.978079e-01
## 2     Speckles        0.011749393   0.9881930947    5.749628e-05
## 3 Intermediate        0.939707696   0.0530534349    7.238836e-03
## 4     Speckles        0.005026811   0.9822224379    1.275078e-02
## 5 Intermediate        0.985535383   0.0082727019    6.191989e-03
## 6     Speckles        0.055980500   0.9434951544    5.243666e-04
##                    rownames.res_method_newData.              Genotype
## 1      dapi_gfp_0001-0018_rox2ko17_gfp_rox2_1_0     rox2ko17_gfp_rox2
## 2    dapi_gfp_0001-0021_rox2ko112_gfp_A3B0_1_18    rox2ko112_gfp_A3B0
## 3 dapi_gfp_0001-0021_rox2ko112_gfp_rox1d3f_1_20 rox2ko112_gfp_rox1d3f
## 4   dapi_gfp_0001-0022_rox2ko17_gfp_minirox_1_2  rox2ko17_gfp_minirox
## 5    dapi_gfp_0001-0023_rox2ko112_gfp_A3B0_2_19    rox2ko112_gfp_A3B0
## 6    dapi_gfp_0001-0023_rox2ko112_gfp_rox2_1_12    rox2ko112_gfp_rox2
df_res_newData %>% select(Genotype,.pred_class) %>% group_by(Genotype) %>% count(.pred_class) %>% mutate(Fraction=n/sum(n)) %>%
    ggplot(aes(x=factor(Genotype),y=Fraction,fill=.pred_class)) + geom_col(width=0.75) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))

While the accuracy can be improved in the future, preliminary results already give us an indication of trends. Clearly full length rox2 generates the most territories. A0B0 and rox1d3c produces a mix of intermediates and territories while the rest are significantly worse at territory formation (.ie. lead to speckled phenotype)

Inspecting emission probabilities of example images gives some ideas on how to improve. In summary, to improve: 1) Improved class labels for training data -> Some images geniunely appear to have wrong labels 2) Improved image resolution -> In previous test, images were acquired at twice the resolution compared to this time. Often, not enough pixels cover speckles. Territories appear grainy as well. 3) Better preprocessing pipeline -> In multiple cases, we have poor crops where only a part of the nuclei is in the image. Some images have multiple cells. Need to eliminate these images in preprocess pipeline